package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo6Submit {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession
      .builder()
      //      .master("local")
      .appName("submit")
      .config("spark.sql.shuffle.partitions", 1)
      .getOrCreate()

    import spark.implicits._
    import org.apache.spark.sql.functions._

    //1、读取hdfs中的数据
    val studentDF: DataFrame = spark
      .read
      .format("csv")
      .option("sep", ",")
      .schema("id STRING ,name STRING ,age INT , gender STRING ,clazz STRING")
      .load("/data/students.txt")


    //处理数据
    val genderNumDF: DataFrame = studentDF
      .groupBy($"gender")
      .agg(count($"gender") as "gender_num")

    //将数据保存到hdfs中
    genderNumDF
      .write
      .format("csv")
      .option("sep", ",")
      .mode(SaveMode.Overwrite)
      .save("/data/gender_num")

    /**
      * 将项目打包上传到服务器中运行
      *
      * spark-submit --master yarn-client --class com.shujia.spark.sql.Demo6Submit spark-1.0.jar
      *
      */
  }

}
